In [1]:
import os
import numpy as np
import pandas as pd
import random
import seaborn as sns

import datetime as datetime
import matplotlib.dates as dates
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from contextlib import contextmanager
from time import time
from tqdm import tqdm
import lightgbm as lgbm

from sklearn.metrics import classification_report, log_loss, accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
In [2]:
pip install seaborn
Collecting seaborn
  Downloading seaborn-0.11.2-py3-none-any.whl (292 kB)
Requirement already satisfied: pandas>=0.23 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from seaborn) (1.4.2)
Requirement already satisfied: matplotlib>=2.2 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from seaborn) (3.5.1)
Requirement already satisfied: scipy>=1.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from seaborn) (1.8.0)
Requirement already satisfied: numpy>=1.15 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from seaborn) (1.22.3)
Requirement already satisfied: fonttools>=4.22.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (4.31.2)
Requirement already satisfied: pyparsing>=2.2.1 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (3.0.4)
Requirement already satisfied: cycler>=0.10 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (0.11.0)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (2.8.2)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (1.4.2)
Requirement already satisfied: pillow>=6.2.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (9.1.0)
Requirement already satisfied: packaging>=20.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from matplotlib>=2.2->seaborn) (21.3)
Requirement already satisfied: pytz>=2020.1 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from pandas>=0.23->seaborn) (2022.1)
Requirement already satisfied: six>=1.5 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from python-dateutil>=2.7->matplotlib>=2.2->seaborn) (1.16.0)
Installing collected packages: seaborn
Successfully installed seaborn-0.11.2
Note: you may need to restart the kernel to use updated packages.
In [4]:
pip install tqdm
Collecting tqdm
  Downloading tqdm-4.63.1-py2.py3-none-any.whl (76 kB)
Requirement already satisfied: colorama in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from tqdm) (0.4.4)
Installing collected packages: tqdm
Successfully installed tqdm-4.63.1
Note: you may need to restart the kernel to use updated packages.
In [2]:
pip install lightgbm
Collecting lightgbm
  Downloading lightgbm-3.3.2-py3-none-win_amd64.whl (1.0 MB)
Requirement already satisfied: scikit-learn!=0.22.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from lightgbm) (1.0.2)
Requirement already satisfied: numpy in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from lightgbm) (1.22.3)
Requirement already satisfied: scipy in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from lightgbm) (1.8.0)
Requirement already satisfied: wheel in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from lightgbm) (0.37.1)
Requirement already satisfied: joblib>=0.11 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from scikit-learn!=0.22.0->lightgbm) (1.1.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\krudko\.conda\envs\krudkoenv\lib\site-packages (from scikit-learn!=0.22.0->lightgbm) (3.1.0)
Installing collected packages: lightgbm
Successfully installed lightgbm-3.3.2
Note: you may need to restart the kernel to use updated packages.
In [2]:
data = pd.read_csv("upload_DJIA_table.csv")
data
Out[2]:
Date Open High Low Close Volume Adj Close
0 2016-07-01 17924.240234 18002.380859 17916.910156 17949.369141 82160000 17949.369141
1 2016-06-30 17712.759766 17930.609375 17711.800781 17929.990234 133030000 17929.990234
2 2016-06-29 17456.019531 17704.509766 17456.019531 17694.679688 106380000 17694.679688
3 2016-06-28 17190.509766 17409.720703 17190.509766 17409.720703 112190000 17409.720703
4 2016-06-27 17355.210938 17355.210938 17063.080078 17140.240234 138740000 17140.240234
... ... ... ... ... ... ... ...
1984 2008-08-14 11532.070312 11718.280273 11450.889648 11615.929688 159790000 11615.929688
1985 2008-08-13 11632.809570 11633.780273 11453.339844 11532.959961 182550000 11532.959961
1986 2008-08-12 11781.700195 11782.349609 11601.519531 11642.469727 173590000 11642.469727
1987 2008-08-11 11729.669922 11867.110352 11675.530273 11782.349609 183190000 11782.349609
1988 2008-08-08 11432.089844 11759.959961 11388.040039 11734.320312 212830000 11734.320312

1989 rows × 7 columns

In [3]:
data['Date']=pd.to_datetime(data['Date'])
data=data.sort_values('Date').reset_index(drop=True)
data
Out[3]:
Date Open High Low Close Volume Adj Close
0 2008-08-08 11432.089844 11759.959961 11388.040039 11734.320312 212830000 11734.320312
1 2008-08-11 11729.669922 11867.110352 11675.530273 11782.349609 183190000 11782.349609
2 2008-08-12 11781.700195 11782.349609 11601.519531 11642.469727 173590000 11642.469727
3 2008-08-13 11632.809570 11633.780273 11453.339844 11532.959961 182550000 11532.959961
4 2008-08-14 11532.070312 11718.280273 11450.889648 11615.929688 159790000 11615.929688
... ... ... ... ... ... ... ...
1984 2016-06-27 17355.210938 17355.210938 17063.080078 17140.240234 138740000 17140.240234
1985 2016-06-28 17190.509766 17409.720703 17190.509766 17409.720703 112190000 17409.720703
1986 2016-06-29 17456.019531 17704.509766 17456.019531 17694.679688 106380000 17694.679688
1987 2016-06-30 17712.759766 17930.609375 17711.800781 17929.990234 133030000 17929.990234
1988 2016-07-01 17924.240234 18002.380859 17916.910156 17949.369141 82160000 17949.369141

1989 rows × 7 columns

In [4]:
data0=data[['Date','Open']]
data0['Open 7-day']=data0['Open'].rolling(window=7).mean()
C:\Users\krudko\AppData\Local\Temp\ipykernel_1128\1706044499.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data0['Open 7-day']=data0['Open'].rolling(window=7).mean()
In [5]:
fig=make_subplots(specs=[[{"secondary_y":False}]])
fig.add_trace(go.Scatter(x=data0['Date'],y=data0['Open 7-day'],name='Open mean 7-day'),secondary_y=False,)
fig.update_layout(autosize=False,width=700,height=500,title_text="DJIA Stocks Open mean 7-day")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Price",secondary_y=False)
fig.show()
In [6]:
col0=data0.columns.to_list()
col1=col0+['O7d 7d-before','slope']
data1=pd.DataFrame(columns=col1)
data1[col0]=data0
data1
Out[6]:
Date Open Open 7-day O7d 7d-before slope
0 2008-08-08 11432.089844 NaN NaN NaN
1 2008-08-11 11729.669922 NaN NaN NaN
2 2008-08-12 11781.700195 NaN NaN NaN
3 2008-08-13 11632.809570 NaN NaN NaN
4 2008-08-14 11532.070312 NaN NaN NaN
... ... ... ... ... ...
1984 2016-06-27 17355.210938 17753.751395 NaN NaN
1985 2016-06-28 17190.509766 17676.190011 NaN NaN
1986 2016-06-29 17456.019531 17636.068638 NaN NaN
1987 2016-06-30 17712.759766 17619.701451 NaN NaN
1988 2016-07-01 17924.240234 17632.782924 NaN NaN

1989 rows × 5 columns

In [7]:
n=len(data0)
for  i in range(n):    
    pmi=data1['Open 7-day'][i]
    data1.loc[i+7,'O7d 7d-before']=pmi
data1
Out[7]:
Date Open Open 7-day O7d 7d-before slope
0 2008-08-08 11432.089844 NaN NaN NaN
1 2008-08-11 11729.669922 NaN NaN NaN
2 2008-08-12 11781.700195 NaN NaN NaN
3 2008-08-13 11632.809570 NaN NaN NaN
4 2008-08-14 11532.070312 NaN NaN NaN
... ... ... ... ... ...
1991 NaT NaN NaN 17753.751395 NaN
1992 NaT NaN NaN 17676.190011 NaN
1993 NaT NaN NaN 17636.068638 NaN
1994 NaT NaN NaN 17619.701451 NaN
1995 NaT NaN NaN 17632.782924 NaN

1996 rows × 5 columns

In [8]:
data1['slope']=(data1['Open 7-day']-data1['O7d 7d-before'])/7
data2=data1[['Date','Open 7-day','O7d 7d-before','slope']]
data3=data2[14:-7]

data3
Out[8]:
Date Open 7-day O7d 7d-before slope
14 2008-08-28 11444.291574 11632.171456 -26.839983
15 2008-08-29 11496.761579 11577.352958 -11.513054
16 2008-09-02 11515.390067 11525.00014 -1.372868
17 2008-09-03 11526.707171 11495.568778 4.448342
18 2008-09-04 11513.320034 11509.014509 0.615075
... ... ... ... ...
1984 2016-06-27 17753.751395 17812.551618 -8.400032
1985 2016-06-28 17676.190011 17784.198661 -15.429807
1986 2016-06-29 17636.068638 17750.897042 -16.404058
1987 2016-06-30 17619.701451 17734.969866 -16.466916
1988 2016-07-01 17632.782924 17735.279855 -14.642419

1975 rows × 4 columns

In [9]:
fig=make_subplots(specs=[[{"secondary_y":True}]])
fig.add_trace(go.Scatter(x=data3['Date'],y=data3['slope'],name='slope'),secondary_y=False,)
fig.update_layout(autosize=False,width=700,height=500,title_text="DJIA Stocks Slope change")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="Slope",secondary_y=False)
fig.update_yaxes(title_text="Slope change",secondary_y=True)
fig.show()
In [10]:
news=pd.read_csv('RedditNews.csv')
news['Date']=pd.to_datetime(news['Date'])
news=news.sort_values('Date').reset_index(drop=True)
news
Out[10]:
Date News
0 2008-06-08 b'Marriage, they said, was reduced to the stat...
1 2008-06-08 b'Nim Chimpsky: The tragedy of the chimp who t...
2 2008-06-08 b"Canada: Beware slippery slope' to censorship...
3 2008-06-08 b'EU Vice-President Luisa Morgantini and the I...
4 2008-06-08 b"Israeli minister: Israel will attack Iran if...
... ... ...
73603 2016-07-01 Venezuela, where anger over food shortages is ...
73604 2016-07-01 A Hindu temple worker has been killed by three...
73605 2016-07-01 Ozone layer hole seems to be healing - US &amp...
73606 2016-07-01 Taiwanese warship accidentally fires missile t...
73607 2016-07-01 A 117-year-old woman in Mexico City finally re...

73608 rows × 2 columns

In [11]:
data3[data3['slope']<-120]
Out[11]:
Date Open 7-day O7d 7d-before slope
44 2008-10-10 9836.432896 10867.217215 -147.254903
45 2008-10-13 9498.844308 10840.052874 -191.601224
46 2008-10-14 9342.417132 10787.75865 -206.47736
47 2008-10-15 9196.615793 10715.665737 -217.007135
48 2008-10-16 8999.704381 10563.720006 -223.430804
49 2008-10-17 8933.721401 10320.521484 -198.114298
50 2008-10-20 8875.237026 10161.96582 -183.818399
51 2008-10-21 8974.398438 9836.432896 -123.14778
756 2011-08-09 11665.772880 12514.731445 -121.279795
757 2011-08-10 11534.884347 12454.324219 -131.348553
758 2011-08-11 11334.895787 12369.332729 -147.776706
759 2011-08-12 11231.998605 12252.76423 -145.823661
760 2011-08-15 11142.854213 12153.012835 -144.308375
1775 2015-08-27 16592.648437 17453.345703 -122.956752
1776 2015-08-28 16469.888393 17472.425781 -143.219627
1777 2015-08-31 16367.988281 17436.958705 -152.710061
1778 2015-09-01 16301.893973 17380.924386 -154.147202
1779 2015-09-02 16244.551060 17246.368582 -143.116789
1871 2016-01-14 16575.117048 17504.552734 -132.776527
1872 2016-01-15 16460.759905 17441.919922 -140.165717
1873 2016-01-19 16335.201451 17349.448382 -144.892419
1874 2016-01-20 16259.527204 17202.562779 -134.719368
In [12]:
news1=news[news['Date']=='2008-10-08']['News'].tolist()
for i in range(len(news1)):
    print(news1[i][1:])
    print()
'NYTimes: U.S. Inquiry Is Said to Conclude 30 Civilians Died in Afghan Raid '

'Modern slavery in Dubai'

'Brazil and Argentina, two biggest economies in South America, Monday launched a new payment system of bilateral transaction with their local currencies, aimed at eliminating the U.S. dollar '

"The British government's plan for teachers to monitor their pupils for signs of potential extremism only stifles debate and encourages secrecy."

'Attack on U.S. Embassy in Yemen Linked to Israeli Mossad'

'Russian President Dmitry Medvedev has started his own video blog '

"China's super-wealthy lose a third of their wealth over night"

'Chinese Space Walk Filmed in Water'

'UK announces 50bn ($88bn) rescue plan'

'14-year-old Iowa girl abandoned under Nebraska law'

' The Matrix of Death. A New Dossier on the (Im)Precision of U.S Bombing and the (Under)Valuation of an Afghan Life'

'Russia to cut military personnel to 1 mln by 2012'

'A bunch of multinationals have figured out how to make their pollution-based businesses seem like the solution to the climate crisis.'

'World economic crisis deepens: Nikkei sinks 9%, Russian exchange was shut down after a huge decline at the open, U.K. bailing out banks'

"Anger over Baader-Meinhof Biopic: Victims' Families in Uproar over New German Terrorism Film"

'Is CNN Biased In This Presidential Race? I have been watching and testing'

'China reluctant to reveal tainted milk figures'

'There Will Be No Lasting Peace without the Taliban'

"Hindu mobs in Indian province burning and killing Christians and Muslims...why isn't this in the news?"

"English woman fights to use dead husband's sperm"

'UK banks are now part nationalised as Government injects 500 billion'

'The latest conflict simmering between Lebanon and Israel is all about food: Lebanese businessmen accusing Israel of stealing traditional Middle Eastern dishes like hummus'

'Why are Afghan criminal networks secretly stockpiling enough heroin to supply every junkie on the planet? And where is it?'

'The assassination of Salvador Vergara Cruz, mayor of a Mexican resort town, may represent a turning point in how Mexico deals with its drug trafficking problem.'

'China: No more Western religious music in concert halls.'

In [13]:
news1=news[news['Date']=='2008-10-09']['News'].tolist()
for i in range(len(news1)):
    print(news1[i][1:])
    print()
'"Sovietology, like paranoia, is a very dangerous disease, and it is a pity that part of the U.S. administration still suffers from it," Medvedev said.'

'Jews were never exiled from the Holy Land, most of todays Jews have no historical connection to "Israel"; the only political solution to conflict with the Palestinians is to abolish the Jewish state.'

'Prior to the tyrannical theft of Palestine  via the Balfour Declaration issued to Lord Rothschild  the Middle East had been a relatively peaceful area.'

'Icelandic Regulator Takes Control of Kaupthing Bank '

'Russian president Dmitry Medvedev calls on European leaders to create a new world order that minimises the role of the US'

'China milk victims may have doubled to over 90,000'

'U.K. Uses Anti-Terrorism Law to Seize Icelandic Bank Assets  '

'Mexico: Government agents killed 4 to 6 people in Chiapas, 3 of them mafia execution-style. Residents respond by briefly capturing 77 government agents.'

'A 14 year old schoolboy posed as a female British secret service spy in an internet chatroom to persuade a 16 year old friend to try to murder him'

'The Bush administration this month is quietly cutting off birth control supplies to some of the worlds poorest women in Africa.'

'Enraged neighbors briefly captured 77 police officers using nothing but sticks and traditional machetes.'

'From Germany to Guantanamo: The Career of Prisoner No. 760 believed to have provided aid to the Sept. 11 attackers, and the confessions extracted from him by torture could collapse in court. '

'IMF: World on Brink of Recession'

'Military Justifies Attack That Killed at Least 33 Afghan Civilians (including 12 children)'

"Why Iceland's economy collapsed"

' North Korea said to be deploying missiles'

'Blast rocks Pakistan capital: At least 12 people are feared dead in a suspected car bombing at police headquarters'

'U.S. report warns of crisis in Afghanistan'

'Russia Pulls Out From Georgia Buffer Zones'

'You too can stranglehold your opponents like Vladimir Putin ...'

'Terror law used for Iceland deposits'

'New World Order: Global co-operation, nationalisation and state intervention - all in one day'

'UK uses anti-terror laws against Icelandic bank'

"Britain 'could be mining landfill for gold in a decade'"

'German law professor who almost stopped the EU: Elite wants world government '

In [14]:
news1=news[news['Date']=='2008-10-10']['News'].tolist()
for i in range(len(news1)):
    print(news1[i][1:])
    print()
'Equipped with knives, sticks and clubs, they all had one purpose: to do harm to Arabs for being Arabs.'

'Asian stock markets prunge over 10 percent on news that the sky is falling'

'UK accuses Iceland of economic terrorism: Seizes assets'

"Iran's 'Nuclear Detonators' Are A CIA Fake"

'Yom Kippur: Jews Riot in Acre Over Arab Driving A Car '

'Police faced off against hundreds of Jewish rioters chanting "death to Arabs" and trying to block the city\'s main thoroughfare'

'Nobel peace price for 2008 to Martti Ahtisaari'

'French President Nicolas Sarkozy: Lets put things clearly. There was a Georgian military aggression. That was a mistake. But the Russian armys reaction was disproportionate'

'Iceland has gone bankrupt. They are likely going to ask the International Monetary Fund for help and ditch the krona.'

'Icelandic Currency (ISK) loses 73% of its value in a single day. Yesterday, 1 USD = 92 ISK. Today, 1 USD = 418 ISK. '

'Oops! "Jewish People" is a Myth, says Jewish Historian '

"Nature loss 'dwarfs bank crisis'"

'Pakistan in crisis on mission for US funding'

'Banned from Sumo for life for smoking marijuana? Japan even more uptight about pot than US'

'Bulgarian roses will be planted in France'

'Mullah Omar: U.S. Should Withdraw Now Or Meet The Same Fate As The Soviets'

'Congressional leaders Harry Reid and Nancy Pelosi urged President Bush Thursday to call an emergency meeting with the G8 to address international financial instability.'

'Inflammatory Republican rallies raise concerns'

'Rioting between Jews in Arabs has resumed in Akko after a Yom Kippur filled with violence in the racially mixed city'

"Two of this week's Nobel Prize winners talk about how the destruction of Hiroshima and Nagasaki changed their lives."

'Meanwhile, shares on the Tehran stock exchange have increased in value by 20% during the year.  (Of course, the majority of their eggs are in the oil basket....)'

'Oh My! Whatever Will The Oil Barons &amp; Ministers Do? We need a rally for OPEC!!'

'"Sources inside the (World ) bank confirm that servers in the institution\'s highly-restricted treasury unit were deeply penetrated with spy software last April."'

'FTSE plunges 440pts in 10 minutes as markets around the world go into freefall again'

'Libya is to withdraw all its assets from Swiss banks, estimated at $7bn'

In [ ]: